from constants import *
from utils import evaluate_model_policy, plot_study, plot_fig
from trainer import get_trained_model
import optuna
from environment import StreetFighterEnv
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from actor_critic import A2CCNNPolicy
from feature_extractors import CNNExtractorWithAttention, CNNExtractor
from tuner import Tuner
import os
from layers import ActorCriticLayer
TIMESTEPS = 1
N_TRIALS = 2
PLOTLY_CONFIG = {"staticPlot": True}
model = A2C
model_dir = 'models/bias'
env = StreetFighterEnv(capture_movement=False)
policy_network = A2CCNNPolicy
policy_kwargs = dict(
features_extractor_class=CNNExtractorWithAttention,
features_extractor_kwargs=dict(features_dim=512,),
actor_critic_class=ActorCriticLayer
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
timesteps=TIMESTEPS, save_dir=model_dir)
study = tuner.tune_study(n_trials=N_TRIALS, )
study.best_trial.number, study.best_params
[I 2022-04-18 02:53:33,101] A new study created in memory with name: no-name-fc90ce01-7d09-4e69-b0fc-c15454923034
[I 2022-04-18 02:54:20,148] Trial 0 finished with value: 2000.0 and parameters: {'gamma': 0.9387100479431529, 'learning_rate': 6.0863300595864514e-05, 'gae_lambda': 0.9611482279481803}. Best is trial 0 with value: 2000.0. [I 2022-04-18 02:54:57,774] Trial 1 finished with value: 0.0 and parameters: {'gamma': 0.9463205445946552, 'learning_rate': 3.0297366046169837e-05, 'gae_lambda': 0.9183136951337001}. Best is trial 0 with value: 2000.0.
(0,
{'gamma': 0.9387100479431529,
'learning_rate': 6.0863300595864514e-05,
'gae_lambda': 0.9611482279481803})
plots = plot_study(study)
for plot in plots:
plot.show("notebook", config=PLOTLY_CONFIG)
model = A2C
model_dir = 'models/bias_with_movement'
env = StreetFighterEnv(capture_movement=True)
policy_network = A2CCNNPolicy
policy_kwargs = dict(
features_extractor_class=CNNExtractorWithAttention,
features_extractor_kwargs=dict(features_dim=512,),
actor_critic_class=ActorCriticLayer
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
timesteps=TIMESTEPS, save_dir=model_dir)
study = tuner.tune_study(n_trials=N_TRIALS, )
study.best_trial.number, study.best_params
[I 2022-04-18 02:54:58,118] A new study created in memory with name: no-name-06134959-136c-4993-9490-b930199b1118
[I 2022-04-18 02:55:39,874] Trial 0 finished with value: 0.0 and parameters: {'gamma': 0.9260040768135016, 'learning_rate': 1.4554401024388566e-05, 'gae_lambda': 0.9577847240942253}. Best is trial 0 with value: 0.0. [I 2022-04-18 02:56:22,498] Trial 1 finished with value: 0.0 and parameters: {'gamma': 0.877822828317454, 'learning_rate': 2.2712786375443775e-05, 'gae_lambda': 0.8850564051231818}. Best is trial 0 with value: 0.0.
(0,
{'gamma': 0.9260040768135016,
'learning_rate': 1.4554401024388566e-05,
'gae_lambda': 0.9577847240942253})
plots = plot_study(study)
for plot in plots:
plot.show("notebook", config=PLOTLY_CONFIG)